package com.daervin.svc.parser;

import com.daervin.svc.common.constants.Constants;
import com.daervin.svc.common.dto.NewsDTO;
import com.daervin.svc.helper.FilterHelper;
import org.apache.log4j.Logger;
import org.springframework.util.StringUtils;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.selector.Selectable;

import java.util.ArrayList;
import java.util.List;

import static com.daervin.svc.common.constants.SourceEnum.PE_DAILY;

/**
 * @author daervin
 * @version 1.0.0
 * @since 1.0.0
 */
public class PedailyParser extends RootParser {
    private final static Logger LOGGER = Logger.getLogger(PedailyParser.class);

    public PedailyParser(String url) {
        super(url);
    }

    @Override
    public void listProcess(Page page) {
        String xpathStr = "//*[@id=\"newslist-all\"]/";
        List<Selectable> newsViewList = page.getHtml().xpath(xpathStr).nodes();
        List<NewsDTO> newsList = new ArrayList<>();
        int count = 0;
        for (Selectable newsView : newsViewList) {
            try {
                String title = newsView.xpath("//h3/a/text()").get().replaceAll("快讯\\|", "");
                String dateTag = newsView.xpath("//div[@class='tag']/span/text()").get();
                NewsDTO news = new NewsDTO();
                news.setTitle(title);
                news.setDesc("");
                if (StringUtils.isEmpty(dateTag)) {
                    continue;
                }
                if (!FilterHelper.financingTitle(title) || title.contains("投资界24h")) {
                    continue;
                }
                news.setBelongDate(dateTag);
                news.setCategory(PE_DAILY.category);
                news.setAnnouncer(PE_DAILY.announcer);
                news.setLinks("");
                if (count > 2) {
                    break;
                }
                newsList.add(news);
            } catch (Exception e) {
                System.err.println("PedailyParser error: " + e.getMessage());
                LOGGER.error("PedailyParser error", e);
            }
        }

        page.putField(Constants.PARSER_RESULT, newsList);
    }
}
